import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
run=pd.read_csv('plant_vase2.CSV')
run
| year | month | day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | irrgation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020 | 3 | 11 | 14 | 44 | 39 | 0.59 | 0.63 | 0.51 | 0.45 | 0.01 | False |
| 1 | 2020 | 3 | 11 | 14 | 47 | 9 | 0.56 | 0.60 | 0.52 | 0.48 | 0.01 | False |
| 2 | 2020 | 3 | 11 | 14 | 49 | 39 | 0.56 | 0.54 | 0.54 | 0.51 | 0.01 | False |
| 3 | 2020 | 3 | 11 | 14 | 52 | 9 | 0.56 | 0.50 | 0.57 | 0.51 | 0.01 | False |
| 4 | 2020 | 3 | 11 | 14 | 54 | 39 | 0.57 | 0.53 | 0.58 | 0.51 | 0.01 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10284 | 2020 | 3 | 29 | 11 | 53 | 33 | 0.03 | 0.96 | 0.93 | 0.99 | 0.01 | False |
| 10285 | 2020 | 3 | 29 | 11 | 56 | 3 | 0.03 | 0.96 | 0.90 | 0.99 | 0.01 | False |
| 10286 | 2020 | 3 | 29 | 11 | 58 | 33 | 0.03 | 0.96 | 0.91 | 0.99 | 0.01 | False |
| 10287 | 2020 | 3 | 29 | 12 | 1 | 3 | 0.03 | 0.96 | 0.89 | 0.99 | 0.01 | False |
| 10288 | 2020 | 3 | 29 | 12 | 3 | 33 | 0.03 | 0.96 | 0.89 | 0.99 | 0.01 | False |
10289 rows × 12 columns
run.drop(columns=['irrgation'])
| year | month | day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020 | 3 | 11 | 14 | 44 | 39 | 0.59 | 0.63 | 0.51 | 0.45 | 0.01 |
| 1 | 2020 | 3 | 11 | 14 | 47 | 9 | 0.56 | 0.60 | 0.52 | 0.48 | 0.01 |
| 2 | 2020 | 3 | 11 | 14 | 49 | 39 | 0.56 | 0.54 | 0.54 | 0.51 | 0.01 |
| 3 | 2020 | 3 | 11 | 14 | 52 | 9 | 0.56 | 0.50 | 0.57 | 0.51 | 0.01 |
| 4 | 2020 | 3 | 11 | 14 | 54 | 39 | 0.57 | 0.53 | 0.58 | 0.51 | 0.01 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10284 | 2020 | 3 | 29 | 11 | 53 | 33 | 0.03 | 0.96 | 0.93 | 0.99 | 0.01 |
| 10285 | 2020 | 3 | 29 | 11 | 56 | 3 | 0.03 | 0.96 | 0.90 | 0.99 | 0.01 |
| 10286 | 2020 | 3 | 29 | 11 | 58 | 33 | 0.03 | 0.96 | 0.91 | 0.99 | 0.01 |
| 10287 | 2020 | 3 | 29 | 12 | 1 | 3 | 0.03 | 0.96 | 0.89 | 0.99 | 0.01 |
| 10288 | 2020 | 3 | 29 | 12 | 3 | 33 | 0.03 | 0.96 | 0.89 | 0.99 | 0.01 |
10289 rows × 11 columns
run.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10289 entries, 0 to 10288 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 10289 non-null int64 1 month 10289 non-null int64 2 day 10289 non-null int64 3 hour 10289 non-null int64 4 minute 10289 non-null int64 5 second 10289 non-null int64 6 moisture0 10289 non-null float64 7 moisture1 10289 non-null float64 8 moisture2 10289 non-null float64 9 moisture3 10289 non-null float64 10 moisture4 10289 non-null float64 11 irrgation 10289 non-null bool dtypes: bool(1), float64(5), int64(6) memory usage: 894.4 KB
run.dtypes
year int64 month int64 day int64 hour int64 minute int64 second int64 moisture0 float64 moisture1 float64 moisture2 float64 moisture3 float64 moisture4 float64 irrgation bool dtype: object
print(list(run))
['year', 'month', 'day', 'hour', 'minute', 'second', 'moisture0', 'moisture1', 'moisture2', 'moisture3', 'moisture4', 'irrgation']
sns.lmplot(x="day", y="moisture4", hue="moisture4",palette="rocket", data=run)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="icefire", data=run)
plt.show()
sns.lmplot(x="minute", y="moisture4", hue="moisture4",palette="coolwarm",data=run)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="cubehelix",data=run)
plt.show()
plt.show()
sns.lmplot(x="moisture0", y="moisture4", hue="moisture4",palette="YlOrBr", data=run)
plt.show()
sns.lmplot(x="moisture1", y="moisture4", hue="moisture4",palette="Blues", data=run)
plt.show()
sns.lmplot(x="moisture2", y="moisture4", hue="moisture4",palette="viridis", data=run)
plt.show()
sns.lmplot(x="moisture3", y="moisture4", hue="moisture4",palette="rocket_r", data=run)
plt.show()
plt.show()
run[run.columns[1:]].corr()['moisture3'][:]
month NaN day 0.759488 hour -0.038817 minute -0.000235 second -0.005488 moisture0 -0.600088 moisture1 0.274376 moisture2 0.648850 moisture3 1.000000 moisture4 0.313640 irrgation NaN Name: moisture3, dtype: float64
corr = run.corr()
corr.style.background_gradient(cmap='coolwarm')
C:\Users\DELL\anaconda3\lib\site-packages\pandas\io\formats\style.py:2813: RuntimeWarning: All-NaN slice encountered smin = np.nanmin(gmap) if vmin is None else vmin C:\Users\DELL\anaconda3\lib\site-packages\pandas\io\formats\style.py:2814: RuntimeWarning: All-NaN slice encountered smax = np.nanmax(gmap) if vmax is None else vmax
| year | month | day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | irrgation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan |
| month | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan |
| day | nan | nan | 1.000000 | -0.081286 | -0.002565 | -0.003688 | -0.901576 | 0.446852 | 0.688953 | 0.759488 | 0.209223 | nan |
| hour | nan | nan | -0.081286 | 1.000000 | 0.002132 | -0.033278 | 0.008766 | -0.032911 | -0.061425 | -0.038817 | 0.038641 | nan |
| minute | nan | nan | -0.002565 | 0.002132 | 1.000000 | -0.001658 | 0.000744 | 0.000057 | -0.000239 | -0.000235 | -0.015091 | nan |
| second | nan | nan | -0.003688 | -0.033278 | -0.001658 | 1.000000 | 0.006799 | -0.004114 | -0.006818 | -0.005488 | -0.006834 | nan |
| moisture0 | nan | nan | -0.901576 | 0.008766 | 0.000744 | 0.006799 | 1.000000 | -0.223934 | -0.546708 | -0.600088 | -0.060366 | nan |
| moisture1 | nan | nan | 0.446852 | -0.032911 | 0.000057 | -0.004114 | -0.223934 | 1.000000 | 0.644527 | 0.274376 | 0.195880 | nan |
| moisture2 | nan | nan | 0.688953 | -0.061425 | -0.000239 | -0.006818 | -0.546708 | 0.644527 | 1.000000 | 0.648850 | 0.144897 | nan |
| moisture3 | nan | nan | 0.759488 | -0.038817 | -0.000235 | -0.005488 | -0.600088 | 0.274376 | 0.648850 | 1.000000 | 0.313640 | nan |
| moisture4 | nan | nan | 0.209223 | 0.038641 | -0.015091 | -0.006834 | -0.060366 | 0.195880 | 0.144897 | 0.313640 | 1.000000 | nan |
| irrgation | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan |
duplicate_rows_run = run[run.duplicated()]
run.head()
| year | month | day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | irrgation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020 | 3 | 11 | 14 | 44 | 39 | 0.59 | 0.63 | 0.51 | 0.45 | 0.01 | False |
| 1 | 2020 | 3 | 11 | 14 | 47 | 9 | 0.56 | 0.60 | 0.52 | 0.48 | 0.01 | False |
| 2 | 2020 | 3 | 11 | 14 | 49 | 39 | 0.56 | 0.54 | 0.54 | 0.51 | 0.01 | False |
| 3 | 2020 | 3 | 11 | 14 | 52 | 9 | 0.56 | 0.50 | 0.57 | 0.51 | 0.01 | False |
| 4 | 2020 | 3 | 11 | 14 | 54 | 39 | 0.57 | 0.53 | 0.58 | 0.51 | 0.01 | False |
run[['moisture0', 'moisture1','moisture2', 'moisture3','moisture4']].plot()
<AxesSubplot:>
run.drop(['irrgation','year','month'],axis=1,inplace=True)
run.isnull().sum()
day 0 hour 0 minute 0 second 0 moisture0 0 moisture1 0 moisture2 0 moisture3 0 moisture4 0 dtype: int64
X = run.drop('moisture4',axis=1)
X.head()
| day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 11 | 14 | 44 | 39 | 0.59 | 0.63 | 0.51 | 0.45 |
| 1 | 11 | 14 | 47 | 9 | 0.56 | 0.60 | 0.52 | 0.48 |
| 2 | 11 | 14 | 49 | 39 | 0.56 | 0.54 | 0.54 | 0.51 |
| 3 | 11 | 14 | 52 | 9 | 0.56 | 0.50 | 0.57 | 0.51 |
| 4 | 11 | 14 | 54 | 39 | 0.57 | 0.53 | 0.58 | 0.51 |
y = run['moisture4']
y.head()
0 0.01 1 0.01 2 0.01 3 0.01 4 0.01 Name: moisture4, dtype: float64
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
sc = StandardScaler()
X = sc.fit_transform(X)
X
array([[-1.74916072, 0.36177781, 0.83927869, ..., -0.47847994,
-3.19532076, -2.72890505],
[-1.74916072, 0.36177781, 1.01241716, ..., -0.6145305 ,
-3.09231758, -2.52112266],
[-1.74916072, 0.36177781, 1.1278428 , ..., -0.88663161,
-2.88631124, -2.31334026],
...,
[ 1.72581932, -0.07036474, 1.64725818, ..., 1.01807619,
0.9248061 , 1.01117803],
[ 1.72581932, 0.07368278, -1.64237259, ..., 1.01807619,
0.71879976, 1.01117803],
[ 1.72581932, 0.07368278, -1.52694695, ..., 1.01807619,
0.71879976, 1.01117803]])
from sklearn import metrics
X.shape
(10289, 8)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
y_test
1825 0.01
7046 0.01
6186 0.00
2025 0.01
5414 0.00
...
3945 0.01
164 0.01
6586 0.01
2715 0.01
9179 0.01
Name: moisture4, Length: 3087, dtype: float64
def predict(algorithm):
model = algorithm.fit(X_train,y_train)
print('Training Score: {}'.format(model.score(X_train,y_train)))
print('Test Accuracy: {}'.format(model.score(X_test, y_test)))
preds = model.predict(X_test)
print('Predictions are: {}'.format(preds))
print('\n')
r2_score = metrics.r2_score(y_test,preds)
print('r2_score is:{}'.format(r2_score))
print('MAE:',metrics.mean_absolute_error(y_test,preds))
print('MSE:',metrics.mean_squared_error(y_test,preds))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,preds)))
sns.distplot(y_test-preds,color='red')
from sklearn.metrics import accuracy_score as score
from sklearn.linear_model import LinearRegression
predict(LinearRegression())
Training Score: 0.1799268532108098 Test Accuracy: 0.1914286131772025 Predictions are: [0.0081312 0.00483921 0.00675845 ... 0.0061497 0.00912347 0.01056536] r2_score is:0.1914286131772025 MAE: 0.0029209458009128264 MSE: 1.4439294462881086e-05 RMSE: 0.003799907165034573
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
ln_model = LinearRegression()
ln_model.fit(X_train, y_train)
preds1 = ln_model.predict(X_test)
preds1
array([0.0081312 , 0.00483921, 0.00675845, ..., 0.0061497 , 0.00912347,
0.01056536])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds1, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.ensemble import RandomForestRegressor
predict(RandomForestRegressor())
Training Score: 0.9047847287002861 Test Accuracy: 0.3379567088972071 Predictions are: [0.0064 0.01 0.009 ... 0.009 0.01 0.01 ] r2_score is:0.3379567088972071 MAE: 0.002115743440233237 MSE: 1.1822627146096536e-05 RMSE: 0.0034384047385519548
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
preds2 = rf.predict(X_test)
preds2
array([0.0056, 0.0097, 0.0086, ..., 0.0092, 0.01 , 0.01 ])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds2, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.neighbors import KNeighborsRegressor
predict(KNeighborsRegressor())
Training Score: 0.48203501320363396 Test Accuracy: 0.2580765908436512 Predictions are: [0.006 0.01 0.008 ... 0.01 0.01 0.01 ] r2_score is:0.2580765908436512 MAE: 0.0022073210236475543 MSE: 1.3249109167476514e-05 RMSE: 0.0036399325773256454
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
knn = KNeighborsRegressor()
knn.fit(X_train, y_train)
preds3 = knn.predict(X_test)
preds3
array([0.006, 0.01 , 0.008, ..., 0.01 , 0.01 , 0.01 ])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds3, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.tree import DecisionTreeRegressor
predict(DecisionTreeRegressor())
Training Score: 1.0 Test Accuracy: -0.12286207889432754 Predictions are: [0. 0.01 0.01 ... 0.01 0.01 0.01] r2_score is:-0.12286207889432754 MAE: 0.0020051830255911913 MSE: 2.005183025591189e-05 RMSE: 0.0044779270043081194
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
preds4 = dt.predict(X_test)
preds4
array([0.01, 0.01, 0.01, ..., 0.01, 0.01, 0.01])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds4, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from xgboost.sklearn import XGBRegressor
predict( XGBRegressor())
Training Score: 0.714357284952877 Test Accuracy: 0.3272909589754486 Predictions are: [0.00922653 0.01143963 0.00779632 ... 0.00851323 0.00929431 0.00961161] r2_score is:0.3272909589754486 MAE: 0.0021950082869981423 MSE: 1.201309382139267e-05 RMSE: 0.003465991030195068
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
preds5 = xgb.predict(X_test)
preds5
array([0.00922653, 0.01143963, 0.00779632, ..., 0.00851323, 0.00929431,
0.00961161], dtype=float32)
import plotly.express as px
fig = px.scatter(x=y_test, y=preds5, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.tree import DecisionTreeRegressor
predict(DecisionTreeRegressor())
Training Score: 1.0 Test Accuracy: -0.15188597754103061 Predictions are: [0. 0.01 0.01 ... 0.01 0.01 0.01] r2_score is:-0.15188597754103061 MAE: 0.0020570132815030803 MSE: 2.0570132815030774e-05 RMSE: 0.0045354308301451115
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
dt = KNeighborsRegressor()
dt.fit(X_train, y_train)
preds4 = dt.predict(X_test)
preds4
array([0.006, 0.01 , 0.008, ..., 0.01 , 0.01 , 0.01 ])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds4, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()